#!/usr/bin/python
# Filename: compute_em_par.py

f = file('ner.counts');
words_tbl = dict();
tag_count = dict();
for line in f:
	words = line.strip().split(' ');
	if words[1] == 'WORDTAG':
		words_pair = (words[3],words[2]);
		words_tbl[words_pair] = words[0];
	if words[1] == '1-GRAM':
		tag_count[words[2]] = words[0];
f.close();
f = open('emmission.dat','w');
for key in words_tbl.keys():
		freq = float(words_tbl[key])/float(tag_count[key[1]]);
		lines = key[0]+' '+key[1]+' '+str(freq)+'\n';
		f.write(lines);
f.close();
		
		
